{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 使用Python下载网页中影像。\n",
    "\n",
    "使用Jupyter进行交互式的网页分析，提取其中的图片标签列表，然后下载到服务器的本地目录。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# coding:utf-8\n",
    "# 引入requests包和正则表达式包re\n",
    "import requests\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 自定义下载页面函数\n",
    "def load_page(url):\n",
    "    response=requests.get(url)\n",
    "    data=response.content\n",
    "    return data\n",
    " \n",
    "# 自定义保存页面图片函数\n",
    "def get_image(html):\n",
    "    regx=r'http://[\\S]*jpg'  # 定义图片正则表达式\n",
    "    pattern=re.compile(regx) # 编译表达式构造匹配模式\n",
    "    get_images=re.findall(pattern,repr(html)) # 在页面中匹配图片链接\n",
    "    \n",
    "    print(\"发现jpg图像,共计：\",len(get_images))\n",
    "    \n",
    "    num=1\n",
    "    # 遍历匹配成功的链接\n",
    "    for img in  get_images:\n",
    "        image=load_page(img) #根据图片链接，下载图片链接\n",
    "        # 将下载的图片保存到对应的文件夹中\n",
    "        with open('/home/jovyan/quickstart/images/%s.jpg' % num,'wb') as fb:\n",
    "            fb.write(image)\n",
    "            print(\"正在下载第%s张图片\" %num)\n",
    "            num=num+1\n",
    "    print(\"下载完成！\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "32\n",
      "正在下载第1张图片\n",
      "正在下载第2张图片\n",
      "正在下载第3张图片\n",
      "正在下载第4张图片\n",
      "正在下载第5张图片\n",
      "正在下载第6张图片\n",
      "正在下载第7张图片\n",
      "正在下载第8张图片\n",
      "正在下载第9张图片\n",
      "正在下载第10张图片\n",
      "正在下载第11张图片\n",
      "正在下载第12张图片\n",
      "正在下载第13张图片\n",
      "正在下载第14张图片\n",
      "正在下载第15张图片\n",
      "正在下载第16张图片\n",
      "正在下载第17张图片\n",
      "正在下载第18张图片\n",
      "正在下载第19张图片\n",
      "正在下载第20张图片\n",
      "正在下载第21张图片\n",
      "正在下载第22张图片\n",
      "正在下载第23张图片\n",
      "正在下载第24张图片\n",
      "正在下载第25张图片\n",
      "正在下载第26张图片\n",
      "正在下载第27张图片\n",
      "正在下载第28张图片\n",
      "正在下载第29张图片\n",
      "正在下载第30张图片\n",
      "正在下载第31张图片\n",
      "正在下载第32张图片\n",
      "下载完成！\n"
     ]
    }
   ],
   "source": [
    "# 定义爬取页面的链接\n",
    "url ='http://p.weather.com.cn/2017/06/2720826.shtml#p=7'\n",
    "\n",
    "# 调用load_page函数，下载页面内容\n",
    "html = load_page(url)\n",
    "\n",
    "# 在页面中，匹配图片链接，并将图片下载下来，保存到对应文件夹\n",
    "get_image(html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 清除文件\n",
    "!rm /home/jovyan/quickstart/images/*.*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
